################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script implements general recodings of the raw dataset, such as excluding speeches 
# of the Commission, speeches without votes etc. It also adds many of the 
# independent variables.
#
# The CMP dataset cannot be copied to the Dataverse (due to terms of use) and is therefore
# not included in the replication data. In order to recreate the data, please
# go to https://wratil.eu/data and rerun his script to create the file 
# 'parties_cmp2019_online_2.2.dta' and include it in the data folder.
# Unfortunately, this means that this script will not run without this step.
# However, you can just skip this script and go directly to Script #2 and
# work with our pre-loaded dataset.
#
# Additionally, this script produces the following graphs and tables:
# Figure 1
# Figure A2
# Figure A3
# Table A1
# Table A4
# Table F1
# Table J2
################################################################################-

#### Set Up and Load Data ####

R.version

library(tidyverse) # version:1.3.2
library(quanteda)  # version:3.2.1
library(xtable)    # version:1.8-4
library(readxl)    # version:1.4.0

load("data/council_corpus_apsr.RData")

# Add in Eurobarometer Data
eurobarometer <- readstata13::read.dta13("data/eb_responsiveness_apsr.dta")
eurobarometer$nation <- recode(eurobarometer$nation,
                               "bu" = "bg")

# Add in Manifesto Project Data
cmp_positions <- readstata13::read.dta13("data/parties_cmp2019_online_2.2.dta")
cmp_positions <- cmp_positions %>%
  mutate(actor = case_when(
    country == 1 ~ "BE",
    country == 2 ~ "BG",
    country == 3 ~ "CZ",
    country == 4 ~ "DK",
    country == 5 ~ "DE",
    country == 6 ~ "EE",
    country == 7 ~ "IE",
    country == 8 ~ "EL",
    country == 9 ~ "ES",
    country == 10 ~ "FR",
    country == 11 ~ "HR",
    country == 12 ~ "IT",
    country == 13 ~ "CY",
    country == 14 ~ "LV",
    country == 15 ~ "LT",
    country == 16 ~ "LU",
    country == 17 ~ "HU",
    country == 18 ~ "MT",
    country == 19 ~ "NL",
    country == 20 ~ "AT",
    country == 21 ~ "PL",
    country == 22 ~ "PT",
    country == 23 ~ "RO",
    country == 24 ~ "SI",
    country == 25 ~ "SK",
    country == 26 ~ "FI",
    country == 27 ~ "SE",
    country == 28 ~ "UK"
  ))

cmp_positions <- cmp_positions %>% 
  select(-c("gov_planned_economy_cmp_static","gov_planeco_cmp_static",
            "gov_free_market_cmp_static","gov_state_involvement_cmp_static",
            "gov_welfare_cmp_static","gov_markeco_cmp_static"))

eu_budget <- read_xlsx("data/eu_budget_contributions_2016.xlsx")
inflation <- read_xls("data/eurostat_inflation_hicp_2019.xls",sheet = "Ready to load")
unemployment <- read_xls("data/eurostat_unemployment_rate_2019.xls",sheet = "Ready to load")
gov_bonds <- read.csv("data/irt_lt_mcby_m_1_Data.csv")

gov_bonds$year <- substr(gov_bonds$TIME, 1, 4)
gov_bonds$month <- substr(gov_bonds$TIME, 6, 7)
gov_bonds$date <- paste0(gov_bonds$year,"-",gov_bonds$month,"-01") %>% as.Date()
gov_bonds <- gov_bonds %>% 
  mutate(GEO = case_when(
    GEO == "Austria" ~ "AT",
    GEO == "Belgium" ~ "BE",
    GEO == "Bulgaria" ~ "BG",
    GEO == "Croatia" ~ "HR",
    GEO == "Cyprus" ~ "CY",
    GEO == "Czechia" ~ "CZ",
    GEO == "Denmark" ~ "DK",
    GEO == "Estonia" ~ "EE",
    GEO == "Finland" ~ "FI",
    GEO == "France" ~ "FR",
    GEO == "Germany (until 1990 former territory of the FRG)" ~ "DE",
    GEO == "Greece" ~ "EL",
    GEO == "Hungary" ~ "HU",
    GEO == "Ireland" ~ "IE",
    GEO == "Italy" ~ "IT",
    GEO == "Latvia" ~ "LV",
    GEO == "Lithuania" ~ "LT",
    GEO == "Malta" ~ "MT",
    GEO == "Luxembourg" ~ "LU",
    GEO == "Netherlands" ~ "NL",
    GEO == "Poland" ~ "PL",
    GEO == "Portugal" ~ "PT",
    GEO == "Romania" ~ "RO",
    GEO == "Slovakia" ~ "SK",
    GEO == "Slovenia" ~ "SI",
    GEO == "Spain" ~ "ES",
    GEO == "Sweden" ~ "SE",
    GEO == "United Kingdom" ~ "UK",
    TRUE ~GEO
  ))
table(gov_bonds$GEO)

gov_bonds$bond_yields <- as.numeric(gov_bonds$Value)

#-------------------------------#
# Load CHES data
ches_dat <- read_xlsx("data/Cabinet - complete CHES FINAL.xlsx")
ches_join_dat <- data.frame(cabinet_name = unique(ches_dat$cabinet_name),
                            ches_eu_position = NA)

ches_dat$EU_position <- ches_dat$EU_position %>% 
  recode("strongly in favor" = "7")
ches_dat$EU_position <- as.numeric(ches_dat$EU_position)
for(i in 1:nrow(ches_join_dat)){
  ches_dat_temp <- ches_dat %>% 
    filter(cabinet_name==ches_join_dat$cabinet_name[i])
  if(any(is.na(ches_dat_temp$EU_position))){
    cutoff <- 0.8*sum(ches_dat_temp$seats)
    ches_dat_temp_2 <- ches_dat_temp %>% 
      filter(!is.na(EU_position))
    if(sum(ches_dat_temp_2$seats)>cutoff){
      ches_dat_temp <- ches_dat_temp_2
    }
  }
  ches_join_dat$ches_eu_position[i] <- weighted.mean(x = ches_dat_temp$EU_position,
                                                     w = ches_dat_temp$seats)
}

#### Corpus Recodings ####
council_all_final$Presidency_speaker <- ifelse(council_all_final$Presidency==council_all_final$Actor,"Yes","No")
council_all_final$unanimity_any <- ifelse(apply(council_all_final %>% 
                                                  docvars() %>% 
                                                  select(contains("Rule")),      # Create a selection of the corpus with all voting rule variables
                                                MARGIN = 1,                      # Apply function by row
                                                FUN = function(x) length(which(x=="Unanimity")))>0, # Test whether "Unanimity" occurs more than once
                                          "Yes","No")                            # If more than one ote requires unanimity, code 'Yes', otherwise 'No'

council_all_final$Presidency_speaker[council_all_final$Presidency==1] <- "Yes"
table(council_all_final$Presidency_speaker,exclude=NULL)

council_all_final$north_south="Central"
council_all_final$north_south[council_all_final$Actor%in%c("BG","RO","IT",
                                                 "EL","ES","PT","MT","CY")] <- "South"
council_all_final$north_south[council_all_final$Actor%in%c("FI","SE","DK","LT",
                                                 "EE","LV","UK","NL",
                                                 "IE")] <- "North"

table(council_all_final$north_south)

council_all_final$large_small=ifelse(council_all_final$Actor%in%c("DE","FR","IT","UK"),
                                "Large","Small")

table(council_all_final$large_small)
table(council_all_final$Vote1)

# Number of unique debates:
council_all_final %>% 
  docvars() %>% 
  filter(!duplicated(Transcription)) %>% 
  summarise(cases =n())

# Number of speeches in raw corpus:
council_all_final %>% 
  docvars() %>% 
  nrow()

# Number of interventions by the council presidency:
table(council_all_final$Presidency_speaker)

#eliminate all speeches by council presidency
council_all_nopres=corpus_subset(council_all_final,
                                 Presidency_speaker=="No"&
                                   !Actor%in%c("NA","OTH","COM"))

# Number of speeches with missing votes: 82
# Number of speeches withdrawn: 115 + 55 = 170
# Number of speeches of non-member state (Croatia in 2012, 2013): 2
# Number of speeches of member states not participating in voting due to opt-outs: 27+1=28

council_all_nopres %>% 
  docvars() %>% 
  group_by(Vote1) %>% 
  summarise(cases = n()) %>% 
  mutate(prop=cases*100/sum(cases))

# What share of debates had no votes take place? 35.8%
council_all_nopres %>% 
  docvars() %>% 
  filter(!duplicated(Transcription)) %>% 
  mutate(no_votes_took_place = ifelse(Vote1%in%c("MISSING","NOT DECIDED","WITHDRAWAL","Withdrawn")|is.na(Vote1),"Yes","No")) %>% 
  group_by(no_votes_took_place) %>% 
  summarise(cases = n()) %>% 
  mutate(prop=cases*100/sum(cases))

# This is the information in the "Missing Data" section in the Appendix A.3
council_all_nopres %>% 
  docvars() %>% 
  group_by(Vote1) %>% 
  summarise(cases = n()) %>% 
  mutate(prop=cases*100/sum(cases))

# 501 cases pending ('Pending' + 'NOT DECIDED')
# 82 cases missing ('MISSING')
# 170 cases withdrawn ('WITHDRAWAL' + 'Withdrawn')
# 2 Croatia speeches ('No member')
# 28 cases not participating ('Not participating' + 'Not Participating')

# This is Table A4
council_all_nopres %>%
  docvars() %>% 
  filter(Vote1%in%c("No member","Not participating","Not Participating")) %>% 
  mutate(Vote1 = case_when(Vote1=="Not participating" ~ "Not Participating",TRUE ~ Vote1)) %>% 
  select(Vote1,Actor,Council_Config_final,Interinstitutional_Code1) %>% 
  arrange(Vote1) %>% 
  xtable()

council_all_nopres=corpus_subset(council_all_nopres,Vote1%in%c("Abstain","No","Yes"))

# Number of debates with unanimity: 16.5%
council_all_nopres %>% 
  docvars() %>% 
  filter(!duplicated(Transcription)) %>% 
  group_by(unanimity_any) %>% 
  summarise(cases = n())%>%
  mutate(freq = cases / sum(cases))

#create actor_meeting variable for pooling speech by one country in one meeting
docvars(council_all_nopres,"actor_meeting") <- paste(docvars(council_all_nopres,
                                                             "Actor"),
                                                     docvars(council_all_nopres,
                                                             "Council_Meeting"),
                                                     sep="_")

council_all_nopres$budget_any <- ifelse(apply(council_all_nopres %>% 
                                                docvars() %>% 
                                                select(contains("Interinstitutional_Code")) %>% 
                                                mutate_all(.funs=tolower),      # Create a selection of the corpus with all voting rule variables
                                                MARGIN = 1,                      # Apply function by row
                                                FUN = function(x) length(which(grepl("budget",x))))>0, # Test whether "Unanimity" occurs more than once
                                          "Yes","No")                            # If more than one ote requires unanimity, code 'Yes', otherwise 'No'

table(council_all_nopres$budget_any)

# Number of debates on Budget: 13
council_all_nopres %>% 
  docvars() %>% 
  filter(!duplicated(Transcription)) %>% 
  group_by(budget_any) %>% 
  summarise(cases = n())%>%
  mutate(freq = cases / sum(cases))

council_all_nopres %>% 
  docvars() %>% 
  group_by(Council_Config_final) %>% 
  summarise(first_date = min(date_correct),
            last_date = max(date_correct))

#### Descriptive Plots ####

cmp_positions_plot <- cmp_positions %>% 
  filter(date>"2004-01-01"&date<"2018-01-01") %>% 
  filter(grepl("-01$", date)) %>% 
  filter(!is.na(gov_eu_cmp_static)) %>% 
  filter(!(actor=="HR" & date < as.Date("2013-07-01"))) %>% 
  filter(!(actor=="BG" & date < as.Date("2007-01-01"))) %>% 
  filter(!(actor=="RO" & date < as.Date("2007-01-01")))

cmp_positions_plot %>% 
  group_by(actor) %>% 
  summarise(first_case = min(date),
            last_case = max(date)) %>% 
  print(n=40)

# This is Figure A3
cmp_positions_plot %>% 
  mutate(gov_lr_cmp_static = scale(gov_lr_cmp_static)) %>% 
  ggplot(aes(x=gov_lr_cmp_static,y=gov_eu_cmp_static))+
  geom_point()+
  geom_smooth(formula = y~poly(x,2),se=F)+
  theme_bw()+
  labs(x="Government Left-Right Position (z-score)",
       y="Government Anti-Pro EU Position")
ggsave("figures_appendix/figure_a_3.eps", width = 6, height = 4, units = "in")

dates.meetings <- docvars(council_all_nopres) %>% 
  select(date_correct) %>% 
  distinct()

dates.meetings <- dates.meetings %>% 
  mutate(date = as.Date(paste0(format(date_correct,"%Y-%m"),"-01"))) %>% 
  group_by(date) %>% 
  summarise(number_meetings = n())

mean_eu_position_plot <- cmp_positions_plot  %>% 
  group_by(date) %>% 
  summarise(mean_eu = mean(gov_eu_cmp_static,na.rm=TRUE),
            sd_eu = sd(gov_eu_cmp_static,na.rm=TRUE)) %>% 
  mutate(inside = ifelse(date<as.Date("2010-01-01")|date>as.Date("2016-12-31"),"1","0")) %>% 
  ggplot(aes(x=date,y=mean_eu))+
  expand_limits(y=2.75)+
  annotate("text", x = as.Date("2013-07-02"), y = 2.6, label = "Period covered in dataset")+
  #  annotate("segment", x = as.Date("2010-01-01"), xend = as.Date("2016-12-31"), y = 2.45, yend = 2.45,
  #           arrow = arrow(ends = "both"))+
  geom_vline(xintercept=as.Date("2010-01-01"),lty="dashed")+
  geom_vline(xintercept=as.Date("2016-12-31"),lty="dashed")+
  geom_line()+
  theme_bw()+
  theme(legend.position = "none")+
  labs(x="",y="Average Government \nPosition on EU",
       title="b) Average Government Position on the EU in the Council")+
  scale_x_date(date_breaks = "1 year", date_labels =  "%Y") 

months_dates_observations <- dates.meetings %>% 
  ggplot(aes(x=date,y=number_meetings))+
  annotate("text", x = as.Date("2013-07-02"), y = 7.5, label = "Period covered in dataset")+
  #  annotate("segment", x = as.Date("2010-01-01"), xend = as.Date("2016-12-31"), y = 7, yend = 7,
  #           arrow = arrow(ends = "both"))+
  expand_limits(y=8,x=c(as.Date("2004-01-01"),as.Date("2018-01-01")))+
  geom_vline(xintercept=as.Date("2010-01-01"),lty="dashed")+
  geom_vline(xintercept=as.Date("2016-12-31"),lty="dashed")+
  geom_col()+
  theme_bw()+
  theme(legend.position = "none")+
  labs(x="",y="# of meetings \nper month",title="a) Council Meetings Covered in Dataset")+
  scale_x_date(date_breaks = "1 year", date_labels =  "%Y") 

cmp_positions_plot$gov_eu_supporter <- ifelse(cmp_positions_plot$gov_eu_cmp_static>0,
                                              "Europhile Government",
                                              "Eurosceptic Government")

which.eurosceptic <- cmp_positions_plot %>% 
  filter(gov_eu_supporter=="Eurosceptic Government") %>% 
  mutate(gov_date = paste(actor,date,sep="_"))

eb_positions_plot <- eurobarometer %>% 
  filter(start_date>"2004-01-01"&start_date<"2018-01-01") %>% 
  filter(grepl("-01$", start_date)) %>% 
  filter(!is.na(image_ipol)) %>% 
  mutate(nation = toupper(nation)) %>% 
  filter(!(nation=="HR" & start_date < as.Date("2013-07-01"))) %>% 
  filter(!(nation=="BG" & start_date < as.Date("2007-01-01"))) %>% 
  filter(!(nation=="RO" & start_date < as.Date("2007-01-01"))) %>% 
  mutate(gov_date = paste(nation,start_date,sep="_")) %>% 
  mutate(gov_eurosceptic = ifelse(gov_date %in% which.eurosceptic$gov_date,"Yes","No"))

table(eb_positions_plot$gov_eurosceptic)

range(eb_positions_plot$start_date)

mean_eb_image_plot <- eb_positions_plot  %>% 
  group_by(start_date,gov_eurosceptic) %>% 
  summarise(mean_eu = mean(image_ipol,na.rm=TRUE),
            sd_eu = sd(image_ipol,na.rm=TRUE)) %>% 
  mutate(is_inside = ifelse(start_date<as.Date("2010-01-01")|start_date>as.Date("2016-12-31"),"1","0")) %>% 
  ggplot(aes(x=start_date,y=mean_eu,lty=gov_eurosceptic))+
  geom_vline(xintercept=as.Date("2010-01-01"),lty="dashed")+
  geom_vline(xintercept=as.Date("2016-12-31"),lty="dashed")+
  expand_limits(y=4.1,x=c(as.Date("2004-01-01"),as.Date("2018-01-01")))+
  annotate("text", x = as.Date("2013-07-02"), y = 3.95, label = "Period covered in dataset")+
  geom_line()+
  theme_bw()+
  theme(legend.position = "bottom")+
  scale_color_discrete(guide = guide_legend())+
  guides(color = guide_legend(title.position = "top"))+
  labs(x="",y="Average Public \nImage of EU",
       title="c) Average Public Image of the EU",
       color = "Eurosceptic Government",
       lty = "Eurosceptic Government")+
  scale_x_date(date_breaks = "1 year", date_labels =  "%Y") 

# This is Figure 1
ggpubr::ggarrange(months_dates_observations,
                  mean_eu_position_plot,
                  mean_eb_image_plot,
                  common.legend = TRUE, 
                  legend="bottom",
                  ncol=1)
ggsave("figures_main_paper/figure_1.eps", width = 6, height = 7, units = "in")

tiff("figures_main_paper/figure_1.tiff", units="in", width=6, height=7, res=1000)
ggpubr::ggarrange(months_dates_observations,
                  mean_eu_position_plot,
                  mean_eb_image_plot,
                  common.legend = TRUE, 
                  legend="bottom",
                  ncol=1)
dev.off()

eb_positions_plot  %>% 
  mutate(period = case_when(
    start_date<"2010-01-01" ~ "before",
    start_date>"2016-12-31" ~ "after",
    TRUE ~ "during"
  )) %>% 
  group_by(period) %>% 
  summarise(mean_eu = mean(image_ipol,na.rm=TRUE),
            sd_eu = sd(image_ipol,na.rm=TRUE)) 

distribution_public_image_plot <- eb_positions_plot  %>% 
  mutate(period = case_when(
    start_date<"2010-01-01" ~ "Before",
    start_date>"2016-12-31" ~ "After",
    TRUE ~ "Observation Period"
  )) %>% 
  ggplot(aes(x=image_ipol,color=period,lty=period)) +
  geom_density()+
  theme_bw()+
  theme(legend.position = "bottom")+
  scale_color_discrete(guide = guide_legend())+
  guides(color = guide_legend(title.position = "top"))+
  labs(x="",y="Distribution of \nPublic Image of EU",
       color = "Observation Period",
       lty = "Observation Period")

# This is Figure A2
distribution_public_image_plot
ggsave("figures_appendix/figure_a_2.eps", width = 6, height = 3.5, units = "in")

############################################-
#### Short titles, handcoded Appendix F ####

debate_titles <- read.csv("data/bill_titles_coded.csv") %>% 
  filter(delete=="no") %>% 
  filter(!duplicated(short.name)) %>% 
  select(short.name,Council_Config_final) %>% 
  arrange(Council_Config_final,short.name)

debate_titles$Council_Config_final <- debate_titles$Council_Config_final %>% 
  recode("Justice and Home Affairs" = "JHA",
         "Economic and Financial Affairs" = "Ecofin",
         "Employment, Social Policy, Health and Consumer Affairs" = "EPSCO")

# This is Table F1
xtable::xtable(debate_titles) %>% 
  print(include.rownames=FALSE)

####################################-
#### Join external data to corpus ####

docvars.save <- docvars(council_all_nopres)

docvars.save$year=format(docvars.save$date_correct,"%Y")
docvars.save$actor_year <- paste(docvars.save$Actor,docvars.save$year,sep="_")

debate_days <- docvars.save %>% 
  group_by(Interinstitutional_Code1) %>% 
  summarise(number_meetings = n_distinct(date_correct))

prop.table(table(debate_days$number_meetings==1))

#-------------------------------#
# ... Government Bonds ####
docvars.save$actor_month <- paste(docvars.save$Actor,
                                  paste0(format(docvars.save$date_correct,"%Y-%m"),"-01"),sep="_")
gov_bonds$actor_month <- paste(gov_bonds$GEO,
                               gov_bonds$date,sep="_")
docvars.save <- left_join(docvars.save,gov_bonds %>% 
                            select(-"GEO",-"INT_RT",-"date",-"year",-"month",-"TIME",-"Value",
                                   -"Flag.and.Footnotes"),by="actor_month")

#-------------------------------#
# ... CMP ####
docvars.save$actor_date <- paste(docvars.save$Actor,
                                 docvars.save$date_correct,sep="_")
cmp_positions$actor_date <- paste(cmp_positions$actor,
                                  cmp_positions$date,sep="_")
docvars.save <- left_join(docvars.save,cmp_positions %>% 
                            select(-"country",-"actor",-"date"),by="actor_date")

docvars.save$gov_eu_supporter <- ifelse(docvars.save$gov_eu_cmp_static>0,
                                    "Europhile Government",
                                    "Eurosceptic Government")
docvars.save$gov_lr_cmp_static <- as.numeric(scale(docvars.save$gov_lr_cmp_static))

docvars.save <- docvars.save %>% 
  dplyr::rename("gov_lr_cmp_static_scaled" = "gov_lr_cmp_static")

#-------------------------------#
# ... CHES ####

docvars.save <- docvars.save %>% 
  left_join(ches_join_dat)

docvars.save$gov_eu_supporter_ches <- ifelse(docvars.save$ches_eu_position>=4.5,
                                         "Europhile Government",
                                         "Eurosceptic Government")


docvars.save <- docvars.save %>% 
  mutate(ches_cmp_comb = case_when(
    gov_eu_supporter_ches == "Europhile Government" & gov_eu_supporter == "Europhile Government" ~ "Europhile Government",
    gov_eu_supporter_ches == "Europhile Government" & gov_eu_supporter == "Eurosceptic Government" ~ "Eurosceptic Government",
    gov_eu_supporter_ches == "Eurosceptic Government" & gov_eu_supporter == "Europhile Government" ~ "Eurosceptic Government",
    gov_eu_supporter_ches == "Eurosceptic Government" & gov_eu_supporter == "Eurosceptic Government" ~ "Eurosceptic Government",
    gov_eu_supporter_ches == "Eurosceptic Government" & is.na(gov_eu_supporter) ~ "Eurosceptic Government",
    gov_eu_supporter == "Eurosceptic Government" & is.na(gov_eu_supporter_ches) ~ "Eurosceptic Government",
    gov_eu_supporter_ches == "Europhile Government" & is.na(gov_eu_supporter) ~ "Europhile Government",
    gov_eu_supporter == "Europhile Government" & is.na(gov_eu_supporter_ches) ~ "Europhile Government"
  ))

cabinet_data <- docvars.save %>% 
  select(cabinet_name,Actor,date_correct,gov_eu_cmp_static) %>% 
  group_by(Actor,cabinet_name) %>% 
  summarise(start_date = min(date_correct),
            end_date = max(date_correct),
            mean_eu = mean(gov_eu_cmp_static)) %>% 
  arrange(Actor, start_date)

out <- docvars.save %>% 
  filter(gov_eu_supporter=="Eurosceptic Government") %>% 
  select(cabinet_name,Actor,date_correct) %>% 
  group_by(Actor,cabinet_name) %>% 
  summarise(start_date = min(date_correct),
            end_date = max(date_correct)) %>% 
  mutate(Actor = 
           case_when(Actor == "CY" ~ "Cyprus",
                     Actor == "DK" ~ "Denmark",
                     Actor == "EL" ~ "Greece",
                     Actor == "HU" ~ "Hungary",
                     Actor == "IT" ~ "Italy",
                     Actor == "UK" ~ "United Kingdom")) %>% 
  mutate(start_date = as.character(start_date),
         end_date = as.character(end_date)) %>% 
  dplyr::rename("Name of Cabinet" = "cabinet_name",
                "First Speech in Dataset" = "start_date",
                "Last Speech in Dataset" = "end_date",
                "Country" = "Actor"
  )

# This is Table A1
xtable::xtable(out)

out_combined <- docvars.save %>% 
  filter(ches_cmp_comb=="Eurosceptic Government") %>% 
  select(cabinet_name,Actor,date_correct,ches_eu_position,gov_eu_cmp_static) %>% 
  group_by(Actor,cabinet_name) %>% 
  summarise(start_date = min(date_correct),
            end_date = max(date_correct),
            ches_position = mean(ches_eu_position),
            cmp_position = mean(gov_eu_cmp_static)) %>% 
  mutate(Actor = 
           case_when(Actor == "CY" ~ "Cyprus",
                     Actor == "CZ" ~ "Czech Republic",
                     Actor == "DK" ~ "Denmark",
                     Actor == "EL" ~ "Greece",
                     Actor == "FI" ~ "Finland",
                     Actor == "HU" ~ "Hungary",
                     Actor == "IT" ~ "Italy",
                     Actor == "NL" ~ "Netherlands",
                     Actor == "PL" ~ "Poland",
                     Actor == "UK" ~ "United Kingdom")) %>% 
  mutate(start_date = as.character(start_date),
         end_date = as.character(end_date)) %>% 
  dplyr::rename("Name of Cabinet" = "cabinet_name",
                "First Speech in Dataset" = "start_date",
                "Last Speech in Dataset" = "end_date",
                "Country" = "Actor",
                "EU Position in CHES" = "ches_position",
                "EU Position in CMP" = "cmp_position"
  )

# This is Table J2
xtable::xtable(out_combined)

#-------------------------------#
# ... Eurobarometer ####

eurobarometer$actor_date <- paste(toupper(eurobarometer$nation),
                                  eurobarometer$start_date,sep="_")

docvars.save <- left_join(docvars.save,eurobarometer %>% 
                            select(actor_date,image_lag6m),by="actor_date")
docvars.save$image_lag6m <- as.numeric(scale(docvars.save$image_lag6m))

docvars.save <- docvars.save %>% 
  dplyr::rename("image_lag6m_scaled" = "image_lag6m")

#-------------------------------#
# ... Unemployment ####
names(unemployment)[1] <- "government"
unemployment$government=toupper(unemployment$government)
unemployment$actor_year <- paste(unemployment$government,
                                 unemployment$year,sep="_")
docvars.save <- left_join(docvars.save,unemployment %>%
                            select(-"government",-"year"),by="actor_year")
docvars.save$unemployment <- as.numeric(scale(docvars.save$unemployment))
docvars.save <- docvars.save %>% 
  dplyr::rename(unemployment_scaled = unemployment)

#-------------------------------#
# ... Inflation ####
names(inflation)[1] <- "government"
inflation$government=toupper(inflation$government)
inflation$actor_year <- paste(inflation$government,
                              inflation$year,sep="_")
docvars.save <- left_join(docvars.save,inflation %>%
                            select(-"government",-"year"),by="actor_year")
docvars.save$inflation <- as.numeric(scale(docvars.save$inflation))
docvars.save <- docvars.save %>% 
  dplyr::rename(inflation_scaled = inflation)

#-------------------------------#
# ... EU budget ####
names(eu_budget)[1] <- "year"
eu_budget$government=toupper(eu_budget$government)
eu_budget$actor_year <- paste(eu_budget$government,eu_budget$year,sep="_")
docvars.save <- left_join(docvars.save,eu_budget %>%
                            select(-"government",-"year",-"eu_cont_pc",-"eu_cont_total"),by="actor_year")

docvars.save$eu_cont_gdp <- as.numeric(scale(docvars.save$eu_cont_gdp))
docvars.save <- docvars.save %>% 
  dplyr::rename(eu_receipts_gdp_scaled = eu_cont_gdp)

docvars.save$number_words <- ntoken(council_all_nopres)
range(docvars.save$number_words)
names(docvars.save)

docvars.save$Interinstitutional_Code1[docvars.save$Interinstitutional_Code1==""] <- NA
docvars.save$Interinstitutional_Code2[docvars.save$Interinstitutional_Code2==""] <- NA
docvars.save$Interinstitutional_Code3[docvars.save$Interinstitutional_Code3==""] <- NA
docvars.save$Interinstitutional_Code4[docvars.save$Interinstitutional_Code4==""] <- NA
docvars.save$Interinstitutional_Code5[docvars.save$Interinstitutional_Code5==""] <- NA
docvars.save$Interinstitutional_Code6[docvars.save$Interinstitutional_Code6==""] <- NA
docvars.save$Interinstitutional_Code7[docvars.save$Interinstitutional_Code7==""] <- NA

docvars.save$Vote1Rule[docvars.save$Vote1Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote2Rule[docvars.save$Vote2Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote3Rule[docvars.save$Vote3Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote4Rule[docvars.save$Vote4Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote5Rule[docvars.save$Vote5Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote6Rule[docvars.save$Vote6Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote7Rule[docvars.save$Vote7Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote8Rule[docvars.save$Vote8Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote9Rule[docvars.save$Vote9Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote10Rule[docvars.save$Vote10Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote11Rule[docvars.save$Vote11Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote12Rule[docvars.save$Vote12Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote13Rule[docvars.save$Vote13Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote14Rule[docvars.save$Vote14Rule=="QMV"] <- "Qualified majority"
docvars.save$Vote15Rule[docvars.save$Vote15Rule=="QMV"] <- "Qualified majority"

docvars.save <- docvars.save %>% 
  select(Transcription,Council_Meeting,Place,Date,
         Debate_Number,Debate_Topic,Negotiation_Stage,Actor,Council_Config_final,
         date_correct,actor_date,distance_elect_planned,
         final_two_months,unanimity_any,north_south,
         large_small,actor_meeting,budget_any,year,actor_year,
         actor_month,bond_yields,gov_eu_cmp_static,gov_lr_cmp_static_scaled,
         cabinet_name,gov_eu_supporter,ches_eu_position,
         gov_eu_supporter_ches,ches_cmp_comb,image_lag6m_scaled,unemployment_scaled,
         inflation_scaled,eu_receipts_gdp_scaled,number_words,starts_with("Interinstitutional"),
         starts_with("Document"),starts_with("Vote"))
 
docvars(council_all_nopres)=docvars.save

save(council_all_nopres,file="generated_data/corpus_final.RData")
